// Copyright 2024 The Google Research Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Definition of a pair of two Wikipedia document objects.
// This proto is mainly for external data release.

syntax = "proto2";

package smith;

// Definition of a pair of two WikiDoc objects.
// NextID: 10
message WikiDocPair {
  // An id that uniquely identifies this document pair. The id can be generated
  // based on the urls of the document pair.
  optional string id = 1;

  // The classification label generated by machine.
  // We set this as int in case we would like to change number of graded
  // levels of this label.
  optional int32 machine_label_for_classification = 2;

  // The classification label generated by human.
  optional int32 human_label_for_classification = 3;

  // The regression label generated by machine.
  optional float machine_label_for_regression = 4;

  // The regression label generated by human.
  optional float human_label_for_regression = 5;

  // Two document objects with similarity labels.
  optional WikiDoc doc_one = 6;
  optional WikiDoc doc_two = 7;

  // The model predicted similarity score for this pair.
  optional float model_prediction = 8;

  // The raw human rating scores.
  repeated int32 human_label = 9;
}

// Definition of contents in a WikiDoc objects.
// NextID: 7
message WikiDoc {
  // An id that uniquely identifies this document. The id can be generated
  // based on the url of the document.
  optional string id = 1;

  // The url of the WikiDoc page.
  optional string url = 2;

  // The title of the WikiDoc page.
  optional string title = 3;

  // The description of the WikiDoc page.
  optional string description = 4;

  // The section contents of the WikiDoc page.
  repeated Section section_contents = 5;

  // A list of image ids of images in the WikiDoc page.
  repeated string image_ids = 6;
}

// Definition of sections in WikiDoc pages.
// NextID: 3
message Section {
  optional string title = 1;
  optional string text = 2;
}
